LIBRARIES

# install.packages("ggplot2", dependencies = TRUE)
# install.packages("data.table", dependencies = TRUE)
# install.packages("tidyr", dependencies = TRUE)
# install.packages("reshape2", dependencies = TRUE)
# install.packages("dplyr", dependencies = TRUE)
# install.packages("plotly", dependencies = TRUE)
# install.packages("shiny", dependencies = TRUE)
# install.packages("shinythemes", dependencies = TRUE)
# install.packages("DT", dependencies = TRUE)
library(stringr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(data.table)
## Warning: package 'data.table' was built under R version 4.3.3
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.3.3
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.3.3
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
## The following objects are masked from 'package:data.table':
## 
##     dcast, melt
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(shiny)
## Warning: package 'shiny' was built under R version 4.3.3
library(shinythemes)
## Warning: package 'shinythemes' was built under R version 4.3.3
library(DT)
## Warning: package 'DT' was built under R version 4.3.3
## 
## Attaching package: 'DT'
## The following objects are masked from 'package:shiny':
## 
##     dataTableOutput, renderDataTable

DATASET

df_sleep <- read.csv("Sleep_health_and_lifestyle_dataset.csv", sep=",", header = TRUE)

DATA PREPARATION

df_sleep2 <- df_sleep %>% 
  select(-c(Person.ID))
    
df_sleep2$Gender <- df_sleep2$Gender %>%
  as.factor(.)
    
df_sleep2$Occupation <- df_sleep2$Occupation %>% 
  as.factor(.)
  
df_sleep2$BMI.Category <- df_sleep2$BMI.Category %>% 
  factor(., levels = c("Normal", "Normal Weight", "Overweight", "Obese"))
  
df_sleep$Sleep.Disorder <- df_sleep2$Sleep.Disorder %>% 
  factor(., levels = c("None", "Insomnia", "Sleep Apnea"))
  
df_sleep2 <- df_sleep2 %>%
  mutate(BMI.Category = recode(BMI.Category, "Normal Weight" = "Normal"))
df_sleep_new <- df_sleep2 %>% 
  separate_wider_delim(Blood.Pressure, delim = "/", names = c("Systolic.Pressure", "Diastolic.Pressure"))

df_sleep_new$Systolic.Pressure <- df_sleep_new$Systolic.Pressure %>% 
  as.integer(.)

df_sleep_new$Diastolic.Pressure <- df_sleep_new$Diastolic.Pressure %>%
  as.integer(.)
df_sleep_new <- df_sleep_new %>% 
  mutate(
    Tension.Category = case_when(
      Systolic.Pressure < 90 | Diastolic.Pressure < 60 ~ "Hypotension",
      Systolic.Pressure >= 90 & Systolic.Pressure <= 119 & Diastolic.Pressure >= 60 & Diastolic.Pressure <= 79 ~ "Normal",
      Systolic.Pressure > 90 | Diastolic.Pressure > 60 ~ "Hypertension"
    )
  )

df_sleep_new$Tension.Category <- df_sleep_new$Tension.Category %>% 
  as.factor(.)
df_sleep_clean <- df_sleep_new %>% 
  distinct(.)

Check Outliers

  1. Sleep Duration
chart_1 <- df_sleep_clean %>% 
  ggplot(mapping = aes(x = Sleep.Disorder,
                       y = Sleep.Duration)) +
  geom_boxplot() +
  labs(x = "Sleep Disorder", y = "Sleep Duration") +
  theme_minimal()

ggplotly(chart_1)
  1. Daily Steps
chart_2 <- df_sleep_clean %>% 
  ggplot(mapping = aes(x = Sleep.Disorder,
                       y = Daily.Steps)) +
  geom_boxplot() +
  labs(x = "Sleep Disorder", y = "Daily Steps") +
  theme_minimal()

ggplotly(chart_2)
  1. Heart Rate
chart_3 <- df_sleep_clean %>% 
  ggplot(mapping = aes(x = Sleep.Disorder,
                       y = Heart.Rate)) +
  geom_boxplot() +
  labs(x = "Sleep Disorder", y = "Heart Rate") +
  theme_minimal()

ggplotly(chart_3)
  1. Physical Activity Level
chart_4 <- df_sleep_clean %>% 
  ggplot(mapping = aes(x = Sleep.Disorder,
                       y = Physical.Activity.Level)) +
  geom_boxplot() +
  labs(x = "Sleep Disorder", y = "Physical Activity Level") +
  theme_minimal()

ggplotly(chart_4)
  1. Stress Level
chart_5 <- df_sleep_clean %>% 
  ggplot(mapping = aes(x = Sleep.Disorder,
                       y = Stress.Level)) +
  geom_boxplot() +
  labs(x = "Sleep Disorder", y = "Stress Level") +
  theme_minimal()

ggplotly(chart_5)
  1. Quality of Sleep
chart_6 <- df_sleep_clean %>% 
  ggplot(mapping = aes(x = Sleep.Disorder,
                       y = Quality.of.Sleep)) +
  geom_boxplot() +
  labs(x = "Sleep Disorder", y = "Quality of Sleep") +
  theme_minimal()

ggplotly(chart_6)

Insight : there are some outliers in the data based on the chart we displayed (we group the data based on the Sleep Disorder because we consider it as the most compatible factor to gain insight from).

Since we are aiming to keep the originality of the data we are using, we won’t be handling the outliers.

Statistical Summary of Each Column

df_sleep_clean %>% 
  summary(.)
##     Gender        Age             Occupation Sleep.Duration  Quality.of.Sleep
##  Female:65   Min.   :27.00   Nurse     :29   Min.   :5.800   Min.   :4.000   
##  Male  :67   1st Qu.:33.75   Doctor    :24   1st Qu.:6.400   1st Qu.:6.000   
##              Median :41.00   Engineer  :22   Median :7.150   Median :7.000   
##              Mean   :41.13   Lawyer    :15   Mean   :7.083   Mean   :7.152   
##              3rd Qu.:49.00   Teacher   :15   3rd Qu.:7.725   3rd Qu.:8.000   
##              Max.   :59.00   Accountant:11   Max.   :8.500   Max.   :9.000   
##                              (Other)   :16                                   
##  Physical.Activity.Level  Stress.Level       BMI.Category Systolic.Pressure
##  Min.   :30.00           Min.   :3.000   Normal    :73    Min.   :115.0    
##  1st Qu.:44.25           1st Qu.:4.000   Overweight:52    1st Qu.:120.8    
##  Median :60.00           Median :6.000   Obese     : 7    Median :130.0    
##  Mean   :58.39           Mean   :5.538                    Mean   :128.4    
##  3rd Qu.:75.00           3rd Qu.:7.000                    3rd Qu.:135.0    
##  Max.   :90.00           Max.   :8.000                    Max.   :142.0    
##                                                                            
##  Diastolic.Pressure   Heart.Rate    Daily.Steps    Sleep.Disorder    
##  Min.   :75.00      Min.   :65.0   Min.   : 3000   Length:132        
##  1st Qu.:80.00      1st Qu.:68.0   1st Qu.: 5000   Class :character  
##  Median :85.00      Median :70.0   Median : 7000   Mode  :character  
##  Mean   :84.54      Mean   :71.2   Mean   : 6638                     
##  3rd Qu.:88.50      3rd Qu.:74.0   3rd Qu.: 8000                     
##  Max.   :95.00      Max.   :86.0   Max.   :10000                     
##                                                                      
##      Tension.Category
##  Hypertension:117    
##  Normal      : 15    
##                      
##                      
##                      
##                      
## 

DATA VISUALIZATION

  1. Sleep Disorder Composition based on Occupation
vis_1 <- table(df_sleep_clean$Occupation, df_sleep_clean$Sleep.Disorder) %>% 
  as.data.frame(.) %>% 
  group_by(Var1) %>% 
  mutate(Total = sum(Freq),
         Percentage = Freq * 100 / Total) %>% 
  ungroup(.) %>% 
  ggplot(., aes(x = Var1, 
                y = Freq, 
                fill = Var2, 
                text = paste("Occupation :", Var1,
                             "<br>Sleep Disorder :", Var2,
                             "<br>Percentage :", round(Percentage, 1), "%"))) +
  geom_bar(position = "fill", 
           stat = "identity") +
  scale_y_continuous(labels = scales::percent_format()) +
  scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 0,  
                                   vjust = 0.5,
                                   hjust = 0.5), 
        plot.margin = margin(t = 20)) +
  labs(title = "Percentage of Sleep Disorder per Occupation",  
       x = "Occupation", 
       y = "Percentage", 
       fill = "Sleep Disorder") +
  scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) 

ggplotly(vis_1, tooltip = "text")

Insight : Sales Representation has the highest percentage of Sleep Apnea, while Salesperson has the highest percentage of Insomnia.

  1. BMI Category Composition based on Occupation
vis_2 <- table(df_sleep_clean$Occupation, df_sleep_clean$BMI.Category) %>% 
  as.data.frame(.) %>% 
  group_by(Var1) %>% 
  mutate(Total = sum(Freq),
         Percentage = Freq * 100 / Total) %>% 
  ungroup(.) %>% 
  ggplot(., aes(x = Var1, 
                y = Freq, 
                fill = Var2, 
                text = paste("Occupation :", Var1,
                             "<br>BMI Category :", Var2,
                             "<br>Percentage :", round(Percentage, 1), "%"))) +
  geom_bar(position = "fill", 
           stat = "identity") +
  scale_y_continuous(labels = scales::percent_format()) +
  scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 0, 
                                   vjust = 0.5,
                                   hjust = 0.5), 
        plot.margin = margin(t = 20)) +
  labs(title = "Percentage of BMI Category per Occupation",  
       x = "Occupation", 
       y = "Percentage", 
       fill = "BMI Category")+
    scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) 

ggplotly(vis_2, tooltip = "text")

Insight : Sales Representation has the highest percentage of Obese, while Salesperson, Manager, and Scientist has the highest percentage of Overweight.

  1. Sleep Disorder Composition based on BMI Category
vis_3 <- table(df_sleep_clean$BMI.Category, df_sleep_clean$Sleep.Disorder) %>% 
  as.data.frame(.) %>% 
  group_by(Var1) %>% 
  mutate(Total = sum(Freq),
         Percentage = Freq * 100 / Total) %>% 
  ungroup(.) %>% 
  ggplot(., aes(x = Var1, 
                y = Freq, 
                fill = Var2, 
                text = paste("BMI Category :", Var1,
                             "<br>Sleep Disorder :", Var2,
                             "<br>Percentage :", round(Percentage, 1), "%"))) +
  geom_bar(position = "fill", 
           stat = "identity") +
  scale_y_continuous(labels = scales::percent_format()) +
  scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
  theme_minimal() +
  labs(title = "Percentage of Sleep Disorder per BMI Category",  
       x = "BMI Category", 
       y = "Percentage", 
       fill = "Sleep Disorder")

ggplotly(vis_3, tooltip = "text")

Insight : Normal weighted people are more likely to have no sleep disorder than the other BMI categories, and there are no single person with obese that has no sleep disorder.

  1. Sleep Quality based on BMI Category
vis_4 <- df_sleep_clean %>% 
  ggplot(., aes(x = BMI.Category,
                y = Quality.of.Sleep,
                fill = BMI.Category)) +
  geom_boxplot(width = 5) +
  coord_flip() +
  scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
  theme_minimal() +
  labs(title = "Quality of Sleep based on BMI Category",
       x = "BMI Category",
       y = "Quality of Sleep",
       fill = "BMI Category") +
  expand_limits(x = 0, y = 0)

ggplotly(vis_4)

Insight : Normal weighted people have the best quality of sleep. People with the worst sleep quality are more likely to have higher BMI score/category.

  1. Sleep Duration based on BMI Category
vis_5 <- df_sleep_clean %>% 
  ggplot(., aes(x = BMI.Category,
                y = Sleep.Duration,
                fill = BMI.Category)) +
  geom_boxplot(width = 5) +
  coord_flip() +
  scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
  theme_minimal() +
  labs(title = "Sleep Duration based on BMI Category",  
       x = "BMI Category", 
       y = "Sleep Duration",
       fill = "BMI Category") +
  expand_limits(x = 0, y = 0)

ggplotly(vis_5)

Insight : Normal weighted people has the healthiest duration of sleep. People with shorter sleep duration are more likely to have higher BMI score/category.

  1. Age Distribution based on Sleep Disorder
vis_6 <- df_sleep_clean %>% 
  ggplot(., aes(x = Sleep.Disorder,
                y = Age,
                fill = Sleep.Disorder)) +
  geom_boxplot(width = 5) +
  coord_flip() +
  scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
  theme_minimal() +
  labs(title = "Age Distribution based on Sleep Disorder",  
       x = "Sleep Disorder", 
       y = "Age",
       fill = "Sleep Disorder") +
  expand_limits(x = 0, y = 0)

ggplotly(vis_6)

Insight : Older people are more likely to get Sleep Apnea than young people.

CORRELATION ANALYSIS

heatmap <- df_sleep_clean %>% 
  select_if(is.numeric) %>% 
  cor() %>% 
  melt() %>% 
  ggplot(aes(x = Var1,
             y = Var2,
             fill = value,
             label = round(value, 2),
             text = paste("Variable 1 :", Var1,
                          "<br>Variable 2 :", Var2,
                          "<br>Correlation Coefficient :", round(value, 2)))) +
  geom_tile(color = "white") +
  geom_text(color = "black") +
  scale_fill_gradient2(low = "#FF751A",
                       mid = "white",
                       high = "#3C7237", 
                       midpoint = 0,
                       limit = c(-1, 1),
                       space = "Lab",
                       name = "Correlation") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 0, 
                                   vjust = 0.5,  
                                   hjust = 0.5), 
        axis.text.y = element_text(hjust = 1)) +
  labs(title = "Correlation Heatmap",  
       x = "", 
       y = "") +
  scale_x_discrete(labels = function(x) str_wrap(str_replace_all(x, "\\.", " "), width = 10)) +
  scale_y_discrete(labels = function(x) str_wrap(str_replace_all(x, "\\.", " "), width = 10))

ggplotly(heatmap, tooltip = "text")

Insight : Positively correlated variables : - Sleep Duration and Quality of Sleep - Systolic and Diastolic Pressure - Physical Activity Level and Daily Steps Negatively correlated variables : - Stress Level and Quality of Sleep - Stress Level and Sleep Duration - Quality of Sleep and Heart Rate

ui <- fluidPage(
  # Website Theme
  theme = shinytheme("cosmo"),
  
  title = "Assurance of Learning Data Mining and Visualization Group 3",
  
  # Page Title
  titlePanel(tags$b("Assurance of Learning Data Mining and Visualization Group 3")),
  
  # Navigation Bar
  navbarPage(
    "Navigation",
    
    # Display Group
    
    tabPanel(
      "Group Members",
      
      HTML("<h4><b>Class : LG09</b></h4>
      <h4><b>Group : 3</b></h4>
      <h5><b>1. Clarissa Beatrice Kosasih / 2702209350</b></h5>
      <h5><b>2. Marcelline Cathrine Wilison / 2702210604 </b></h5>
      <h5><b>3. Miecel Alicia Angel J / 2702327601</b></h5>
      <h5><b>4. William / 2702225373</b></h5>
      <h5><b>5. William Darma Wijaya / 2702218645</b></h5>"),
    ),
    
    # Display Raw Dataset
    tabPanel(
      "Raw Dataset",
      
      HTML("<h3><b>Sleep Health and Lifestyle Dataset</b></h3>"),
      
      HTML("<h4><b>Code</b></h4>"),
      verbatimTextOutput("code1"),
      
      HTML("<h4><b>Output</b></h4>"),
      DTOutput("dataset"),
    ),
    
    # Data Preparation
    tabPanel(
      "Data Preparation",
      
      tabsetPanel(
        
        # Assess General Characteristics
        tabPanel(
          tags$b("Assess General Characteristics"),
          
          HTML("<h3><b>Dataset Structures</b></h3>"),
          
          HTML("<h4><b>Code</b></h4>"),
          verbatimTextOutput("code2"),
          
          HTML("<h4><b>Output</b></h4>"),
          verbatimTextOutput("str"),
          
          HTML("<h3><b>Remove Person.ID Column</b></h3>"),
          
          HTML("<h4><b>Code</b></h4>"),
          verbatimTextOutput("code3"),
          
          HTML("<h3><b>Change Categorical Column to Factor Data Type</b></h3>"),
          
          HTML("<h4><b>Code</b></h4>"),
          verbatimTextOutput("code4"),
          
          HTML("<h3><b>Handle Inconsistency Data</b></h3>"),
          HTML("BMI.Category column has inconsistency in 'Normal' and 'Normal Weight' Data."),
          
          HTML("<h4><b>Code</b></h4>"),
          verbatimTextOutput("code5"),
          
          HTML("<h4><b>Dataset Structure</b></h4>"),
          verbatimTextOutput("str2"),
          
          HTML("<h4><b>Dataset</b></h4>"),
          DTOutput("dataset2"),
        ),
        
        # Feature Engineering
        tabPanel(
          tags$b("Feature Engineering"),
          
          HTML("<h3><b>Separate Systolic and Diastolic Pressure</b></h3>"),
          
          HTML("<h4><b>Code</b></h4>"),
          verbatimTextOutput("code6"),
          
          HTML("<h3><b>Create Tension Categories</b></h3>"),
          
          HTML("<h4><b>Code</b></h4>"),
          verbatimTextOutput("code16"),
          
          HTML("<h3><b>Dataset</b></h3>"),
          DTOutput("dataset3"),
        ),
        
        # Data Cleaning
        tabPanel(
          tags$b("Data Cleaning"),
          
          HTML("<h3><b>Check Missing Values</b></h3>"),
          
          HTML("<h4><b>Code</b></h4>"),
          verbatimTextOutput("code7"),
          
          HTML("<h4><b>Output</b></h4>"),
          verbatimTextOutput("checkmiss"),
          
          HTML("<b>Insight :
               </br>There are no missing values in the dataset.</b>"),
          
          HTML("<h3><b>Remove Duplicates</b></h3>"),
          
          HTML("<h4><b>Code</b></h4>"),
          verbatimTextOutput("code8"),
          
          HTML("<h3><b>Get Statistical Summary</b></h3>"),
          
          HTML("<h4><b>Code</b></h4>"),
          verbatimTextOutput("code9"),
          
          HTML("<h4><b>Output</b></h4>"),
          verbatimTextOutput("statsum"),
          
          HTML("<h3><b>Check Outliers</b></h3>"),
          
          HTML("<h4><b>Sleep Duration</br>Code</b></h4>"),
          verbatimTextOutput("code10"),
          
          HTML("<h4><b>Output</b></h4>"),
          plotlyOutput("plot1", height = "500px", width = "700px"),
          
          HTML("<h4><b>Daily Steps</br>Code</b></h4>"),
          verbatimTextOutput("code11"),
          
          HTML("<h4><b>Output</b></h4>"),
          plotlyOutput("plot2", height = "500px", width = "700px"),
          
          HTML("<h4><b>Heart Rate</br>Code</b></h4>"),
          verbatimTextOutput("code12"),
          
          HTML("<h4><b>Output</b></h4>"),
          plotlyOutput("plot3", height = "500px", width = "700px"),
          
          HTML("<h4><b>Physical Activity Level</br>Code</b></h4>"),
          verbatimTextOutput("code13"),
          
          HTML("<h4><b>Output</b></h4>"),
          plotlyOutput("plot4", height = "500px", width = "700px"),
          
          HTML("<h4><b>Stress Level</br>Code</b></h4>"),
          verbatimTextOutput("code14"),
          
          HTML("<h4><b>Output</b></h4>"),
          plotlyOutput("plot5", height = "500px", width = "700px"),
          
          HTML("<h4><b>Quality of Sleep</br>Code</b></h4>"),
          verbatimTextOutput("code15"),
          
          HTML("<h4><b>Output</b></h4>"),
          plotlyOutput("plot6", height = "500px", width = "700px"),
          
          HTML("<b>Insight :
          </br>There are some outliers in the data based on the chart we displayed. 
          </br>We group the data based on the Sleep Disorder because we consider it as the most compatible factor to gain insight from.
          </br>Since we are aiming to keep the originality of the data we are using, we won't be handling the outliers.</b></br></br></br></br></br>"),
        ),
      ),
    ),
    
    # Data Visualization
    tabPanel(
      "Data Visualization",
      
      tabsetPanel(
        
        # Composition Graphs
        tabPanel(
          tags$b("Composition Graphs"),
          
          # Visualization 1
          HTML("<h3><b>Sleep Disorder Composition based on Occupation</b></h3>"),
          
          HTML("<h4><b>Code</b></h4>"),
          verbatimTextOutput("code17"),
          
          HTML("<h4><b>Output</b></h4>"),
          plotlyOutput("vis1", height = "700px", width = "980px"),
          
          HTML("<b>Insight : 
               </br>Sales Representation has the highest percentage of Sleep Apnea, while Salesperson has the highest percentage of Insomnia.</b>"),
          
          # Visualization 2
          HTML("<h3><b>BMI Category Composition based on Occupation</b></h3>"),
          
          HTML("<h4><b>Code</b></h4>"),
          verbatimTextOutput("code18"),
          
          HTML("<h4><b>Output</b></h4>"),
          plotlyOutput("vis2", height = "700px", width = "980px"),
          
          HTML("<b>Insight : 
               </br>Sales Representation has the highest percentage of Obese, while Salesperson, Manager, and Scientist has the highest percentage of Overweight.</b>"),
          
          # Visualization 3
          HTML("<h3><b>Sleep Disorder Composition based on BMI Category</b></h3>"),
          
          HTML("<h4><b>Code</b></h4>"),
          verbatimTextOutput("code19"),
          
          HTML("<h4><b>Output</b></h4>"),
          plotlyOutput("vis3", height = "700px", width = "980px"),
          
          HTML("<b>Insight : 
               </br>Normal weighted people are more likely to have no sleep disorder than the other BMI categories, and there are no single person with obese that has no sleep disorder.</b></br></br></br>"),
        ),
          
        # Distribution Graphs
        tabPanel(
          tags$b("Distribution Graphs"),
          
          # Visualization 4
          HTML("<h3><b>Sleep Quality based on BMI Category</b></h3>"),
          
          HTML("<h4><b>Code</b></h4>"),
          verbatimTextOutput("code20"),
          
          HTML("<h4><b>Output</b></h4>"),
          plotlyOutput("vis4", height = "500px", width = "980px"),
          
          HTML("<b>Insight : 
               </br>Normal weighted people have the best quality of sleep.
               </br>People with the worst sleep quality are more likely to have higher BMI score/category.</b>"),
          
          # Visualization 5
          HTML("<h3><b>Sleep Duration based on BMI Category</b></h3>"),
          
          HTML("<h4><b>Code</b></h4>"),
          verbatimTextOutput("code21"),
          
          HTML("<h4><b>Output</b></h4>"),
          plotlyOutput("vis5", height = "500px", width = "980px"),
          
          HTML("<b>Insight : 
               </br>Normal weighted people has the healthiest duration of sleep.
               </br>People with shorter sleep duration are more likely to have higher BMI score/category.</b>"),
          
          # Visualization 5
          HTML("<h3><b>Age Distribution based on Sleep Disorder</b></h3>"),
          
          HTML("<h4><b>Code</b></h4>"),
          verbatimTextOutput("code22"),
          
          HTML("<h4><b>Output</b></h4>"),
          plotlyOutput("vis6", height = "500px", width = "980px"),
          
          HTML("<b>Insight : 
               </br>Older people are more likely to get Sleep Apnea than young people.</b></br></br></br>"),
        ),
      ),
    ),
      
    # Correlation Analysis
    tabPanel(
      "Correlation Analysis",
      HTML("<h3><b>Sleep Health and Lifestyle Heatmap</b></h3>"),
      HTML("<h4><b>Code</b></h4>"),
      verbatimTextOutput("code24"),
          
      HTML("<h4><b>Output</b></h4>"),
      plotlyOutput("heatmap", height = "700px", width = "980px"),
      
      HTML("<b>Insight :
      </br>Positively correlated variables :
      </br>- Sleep Duration and Quality of Sleep
      </br>- Systolic and Diastolic Pressure
      </br>- Physical Activity Level and Daily Steps
      </br>Negatively correlated variables : 
      </br>- Stress Level and Quality of Sleep
      </br>- Stress Level and Sleep Duration
      </br>- Quality of Sleep and Heart Rate</b></br></br></br>"),
    ),
      
    # Linear Regression
    tabPanel(
      "Linear Regression",
      
      sidebarPanel(
        
        selectInput("variable1", label = "X Axis:",
                    choices = names(df_sleep_clean)[sapply(df_sleep_clean, is.numeric)]),
        
        selectInput("variable2", label = "Y Axis:",
                    choices = names(df_sleep_clean)[sapply(df_sleep_clean, is.numeric)]),
        
        actionButton("submitbutton", "Submit", class = "btn btn-primary"),
      ),
      mainPanel(
        plotlyOutput("visualization"),
        verbatimTextOutput("summary")
      ),
    ),
    
    # Hypothesis Testing
    tabPanel(
      "Hypothesis Testing",
      
      sidebarPanel(
        
        selectInput("variable1h", label = "Dependent Variable :",
                    choices = names(df_sleep_clean)[sapply(df_sleep_clean, is.numeric)]),
        
        selectInput("variable2h", label = "Independent Variable :",
                    choices = names(df_sleep_clean)[sapply(df_sleep_clean, is.numeric)]),
        
        actionButton("submitbutton2", "Submit", class = "btn btn-primary"),
      ),
      mainPanel(
        HTML("<h4><b>P-Value</b></h4>"),
        verbatimTextOutput("htest")
      ),
    ),
  ),
)
server <- function(input, output) {
  output$code1 <- renderPrint({
  code <- 'df_sleep <- read.csv("Sleep_health_and_lifestyle_dataset.csv", sep=",", header = TRUE)\ndf_sleep'
    cat(code)
  })
  
  output$dataset <- renderDT({
    df_sleep
  })
  
  output$code2 <- renderPrint({
    code <- 'str(df_sleep)'
    cat(code)
  })
  
  output$str <- renderPrint({
    str(df_sleep)
  })
  
  output$code3 <- renderPrint({
    code <- 'df_sleep2 <- df_sleep %>% 
  select(-c(Person.ID))'
    cat(code)
  })
  
  output$code4 <- renderPrint({
    code <- 'df_sleep2$Gender <- df_sleep2$Gender %>%
  as.factor(.)
    
df_sleep2$Occupation <- df_sleep2$Occupation %>% 
  as.factor(.)
  
df_sleep2$BMI.Category <- df_sleep2$BMI.Category %>% 
  factor(., levels = c("Normal", "Normal Weight", "Overweight", "Obese"))
  
df_sleep$Sleep.Disorder <- df_sleep2$Sleep.Disorder %>% 
  factor(., levels = c("None", "Insomnia", "Sleep Apnea"))'
    cat(code)
  })
  
  output$code5 <- renderPrint({
    code <- 'df_sleep2 <- df_sleep2 %>%
  mutate(BMI.Category = recode(BMI.Category, "Normal Weight" = "Normal"))'
    cat(code)
  })
  
  output$str2 <- renderPrint({
    str(df_sleep2)
  })
  
  output$dataset2 <- renderDT({
    df_sleep2
  })
  
  output$code6 <- renderPrint({
    code <- 'df_sleep_new <- df_sleep2 %>% 
  separate_wider_delim(Blood.Pressure, delim = "/", names = c("Systolic.Pressure", "Diastolic.Pressure"))

df_sleep_new$Systolic.Pressure <- df_sleep_new$Systolic.Pressure %>% 
  as.integer(.)

df_sleep_new$Diastolic.Pressure <- df_sleep_new$Diastolic.Pressure %>%
  as.integer(.)'
    cat(code)
  })
  
  output$code16 <- renderPrint({
    code <- 'df_sleep_new <- df_sleep_new %>% 
  mutate(
    Tension.Category = case_when(
      Systolic.Pressure < 90 | Diastolic.Pressure < 60 ~ "Hypotension",
      Systolic.Pressure >= 90 & Systolic.Pressure <= 119 & Diastolic.Pressure >= 60 & Diastolic.Pressure <= 79 ~ "Normal",
      Systolic.Pressure > 90 | Diastolic.Pressure > 60 ~ "Hypertension"
    )
  )

df_sleep_new$Tension.Category <- df_sleep_new$Tension.Category %>% 
  as.factor(.)'
    cat(code)
  })
  
  output$dataset3 <- renderDT({
    df_sleep_new
  })
  
  output$code7 <- renderPrint({
    code <- 'sum(is.na(df_sleep_new))'
    cat(code)
  })
  
  output$checkmiss <- renderPrint({
    sum(is.na(df_sleep_new))
  })
  
  output$code8 <- renderPrint({
    code <- 'df_sleep_clean <- df_sleep_new %>% 
  distinct(.)'
    cat(code)
  })
  
  output$code9 <- renderPrint({
    code <- 'df_sleep_clean %>% 
  summary(.)'
    cat(code)
  })
  
  output$statsum <- renderPrint({
    df_sleep_clean %>% 
  summary(.)
  })
  
  output$code10 <- renderPrint({
    code <- 'chart_1 <- df_sleep_clean %>% 
  ggplot(mapping = aes(x = Sleep.Disorder,
                       y = Sleep.Duration)) +
  geom_boxplot() +
  labs(x = "Sleep Disorder", y = "Sleep Duration") +
  theme_minimal()

ggplotly(chart_1)'
    cat(code)
  })
  
  output$plot1 <- renderPlotly({
    ggplotly(chart_1)
  })
  
  output$code11 <- renderPrint({
    code <- 'chart_2 <- df_sleep_clean %>% 
  ggplot(mapping = aes(x = Sleep.Disorder,
                       y = Daily.Steps)) +
  geom_boxplot() +
  labs(x = "Sleep Disorder", y = "Daily Steps") +
  theme_minimal()

ggplotly(chart_2)'
    cat(code)
  })
  
  output$plot2 <- renderPlotly({
    ggplotly(chart_2)
  })
  
  output$code12 <- renderPrint({
    code <- 'chart_3 <- df_sleep_clean %>% 
  ggplot(mapping = aes(x = Sleep.Disorder,
                       y = Heart.Rate)) +
  geom_boxplot() +
  labs(x = "Sleep Disorder", y = "Heart Rate") +
  theme_minimal()

ggplotly(chart_3)'
    cat(code)
  })
  
  output$plot3 <- renderPlotly({
    ggplotly(chart_3)
  })
  
  output$code13 <- renderPrint({
    code <- 'chart_4 <- df_sleep_clean %>% 
  ggplot(mapping = aes(x = Sleep.Disorder,
                       y = Physical.Activity.Level)) +
  geom_boxplot() +
  labs(x = "Sleep Disorder", y = "Physical Activity Level") +
  theme_minimal()

ggplotly(chart_4)'
    cat(code)
  })
  
  output$plot4 <- renderPlotly({
    ggplotly(chart_4)
  })
  
  output$code14 <- renderPrint({
    code <- 'chart_5 <- df_sleep_clean %>% 
  ggplot(mapping = aes(x = Sleep.Disorder,
                       y = Stress.Level)) +
  geom_boxplot() +
  labs(x = "Sleep Disorder", y = "Stress Level") +
  theme_minimal()

ggplotly(chart_5)'
    cat(code)
  })
  
  output$plot5 <- renderPlotly({
    ggplotly(chart_5)
  })
  
  output$code15 <- renderPrint({
    code <- 'chart_6 <- df_sleep_clean %>% 
  ggplot(mapping = aes(x = Sleep.Disorder,
                       y = Quality.of.Sleep)) +
  geom_boxplot() +
  labs(x = "Sleep Disorder", y = "Quality of Sleep") +
  theme_minimal()

ggplotly(chart_6)'
    cat(code)
  })
  
  output$plot6 <- renderPlotly({
    ggplotly(chart_6)
  })
  
  output$code17 <- renderPrint({
    code <- 'vis_1 <- table(df_sleep_clean$Occupation, df_sleep_clean$Sleep.Disorder) %>% 
  as.data.frame(.) %>% 
  group_by(Var1) %>% 
  mutate(Total = sum(Freq),
         Percentage = Freq * 100 / Total) %>% 
  ungroup(.) %>% 
  ggplot(., aes(x = Var1, 
                y = Freq, 
                fill = Var2, 
                text = paste("Occupation :", Var1,
                             "<br>Sleep Disorder :", Var2,
                             "<br>Percentage :", round(Percentage, 1), "%"))) +
  geom_bar(position = "fill", 
           stat = "identity") +
  scale_y_continuous(labels = scales::percent_format()) +
  scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 0,  
                                   vjust = 0.5,
                                   hjust = 0.5), 
        plot.margin = margin(t = 20)) +
  labs(title = "Percentage of Sleep Disorder per Occupation",  
       x = "Occupation", 
       y = "Percentage", 
       fill = "Sleep Disorder") +
  scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) 

ggplotly(vis_1, tooltip = "text")'
    cat(code)
  })
  
  output$vis1 <- renderPlotly({
    ggplotly(vis_1, tooltip = "text")
  })
  
  output$code18 <- renderPrint({
    code <- 'vis_2 <- table(df_sleep_clean$Occupation, df_sleep_clean$BMI.Category) %>% 
  as.data.frame(.) %>% 
  group_by(Var1) %>% 
  mutate(Total = sum(Freq),
         Percentage = Freq * 100 / Total) %>% 
  ungroup(.) %>% 
  ggplot(., aes(x = Var1, 
                y = Freq, 
                fill = Var2, 
                text = paste("Occupation :", Var1,
                             "<br>BMI Category :", Var2,
                             "<br>Percentage :", round(Percentage, 1), "%"))) +
  geom_bar(position = "fill", 
           stat = "identity") +
  scale_y_continuous(labels = scales::percent_format()) +
  scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 0, 
                                   vjust = 0.5,
                                   hjust = 0.5), 
        plot.margin = margin(t = 20)) +
  labs(title = "Percentage of BMI Category per Occupation",  
       x = "Occupation", 
       y = "Percentage", 
       fill = "BMI Category")+
    scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) 

ggplotly(vis_2, tooltip = "text")'
    cat(code)
  })
  
  output$vis2 <- renderPlotly({
    ggplotly(vis_2, tooltip = "text")
  })
  
  output$code19 <- renderPrint({
    code <- 'vis_3 <- table(df_sleep_clean$BMI.Category, df_sleep_clean$Sleep.Disorder) %>% 
  as.data.frame(.) %>% 
  group_by(Var1) %>% 
  mutate(Total = sum(Freq),
         Percentage = Freq * 100 / Total) %>% 
  ungroup(.) %>% 
  ggplot(., aes(x = Var1, 
                y = Freq, 
                fill = Var2, 
                text = paste("BMI Category :", Var1,
                             "<br>Sleep Disorder :", Var2,
                             "<br>Percentage :", round(Percentage, 1), "%"))) +
  geom_bar(position = "fill", 
           stat = "identity") +
  scale_y_continuous(labels = scales::percent_format()) +
  scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
  theme_minimal() +
  labs(title = "Percentage of Sleep Disorder per BMI Category",  
       x = "BMI Category", 
       y = "Percentage", 
       fill = "Sleep Disorder")

ggplotly(vis_3, tooltip = "text")'
    cat(code)
  })
  
  output$vis3 <- renderPlotly({
    ggplotly(vis_3, tooltip = "text")
  })
  
  output$code20 <- renderPrint({
    code <- 'vis_4 <- df_sleep_clean %>% 
  ggplot(., aes(x = BMI.Category,
                y = Quality.of.Sleep,
                fill = BMI.Category)) +
  geom_boxplot(width = 5) +
  coord_flip() +
  scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
  theme_minimal() +
  labs(title = "Quality of Sleep based on BMI Category",
       x = "BMI Category",
       y = "Quality of Sleep",
       fill = "BMI Category") +
  expand_limits(x = 0, y = 0)

ggplotly(vis_4)'
    cat(code)
  })
  
  output$vis4 <- renderPlotly({
    ggplotly(vis_4)
  })
  
  output$code21 <- renderPrint({
    code <- 'vis_5 <- df_sleep_clean %>% 
  ggplot(., aes(x = BMI.Category,
                y = Sleep.Duration,
                fill = BMI.Category)) +
  geom_boxplot(width = 5) +
  coord_flip() +
  scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
  theme_minimal() +
  labs(title = "Sleep Duration based on BMI Category",  
       x = "BMI Category", 
       y = "Sleep Duration",
       fill = "BMI Category") +
  expand_limits(x = 0, y = 0)

ggplotly(vis_5)'
    cat(code)
  })
  
  output$vis5 <- renderPlotly({
    ggplotly(vis_5)
  })
  
  output$code22 <- renderPrint({
    code <- 'vis_6 <- df_sleep_clean %>% 
  ggplot(., aes(x = Sleep.Disorder,
                y = Age,
                fill = Sleep.Disorder)) +
  geom_boxplot(width = 5) +
  coord_flip() +
  scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
  theme_minimal() +
  labs(title = "Age Distribution based on Sleep Disorder",  
       x = "Sleep Disorder", 
       y = "Age",
       fill = "Sleep Disorder") +
  expand_limits(x = 0, y = 0)

ggplotly(vis_6)'
    cat(code)
  })
  
  output$vis6 <- renderPlotly({
    ggplotly(vis_6)
  })
  
  output$code24 <- renderPrint({
    code <- 'heatmap <- df_sleep_clean %>% 
  select_if(is.numeric) %>% 
  cor() %>% 
  melt() %>% 
  ggplot(aes(x = Var1,
             y = Var2,
             fill = value,
             label = round(value, 2),
             text = paste("Variable 1 :", Var1,
                          "<br>Variable 2 :", Var2,
                          "<br>Correlation Coefficient :", round(value, 2)))) +
  geom_tile(color = "white") +
  geom_text(color = "black") +
  scale_fill_gradient2(low = "#FF751A",
                       mid = "white",
                       high = "#3C7237", 
                       midpoint = 0,
                       limit = c(-1, 1),
                       space = "Lab",
                       name = "Correlation") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 0, 
                                   vjust = 0.5,  
                                   hjust = 0.5), 
        axis.text.y = element_text(hjust = 1)) +
  labs(title = "Correlation Heatmap",  
       x = "", 
       y = "") +
  scale_x_discrete(labels = function(x) str_wrap(str_replace_all(x, "\\.", " "), width = 10)) +
  scale_y_discrete(labels = function(x) str_wrap(str_replace_all(x, "\\.", " "), width = 10))

ggplotly(heatmap, tooltip = "text")'
    cat(code)
  })
  
  output$heatmap <- renderPlotly({
    ggplotly(heatmap, tooltip = "text")
  })
  
  observeEvent(input$submitbutton, {
    reg <- df_sleep_clean %>% 
  ggplot(., aes_string(x = input$variable1,
                       y = input$variable2)) +
  geom_jitter(color = "#5D9D57") +
  geom_smooth(method = "lm",
              se = FALSE,
              color = "red") +
  labs(title = paste("Linear Regression", input$variable1, "vs",  input$variable2),
       x = paste(input$variable1),
       y = paste(input$variable2))
      
    output$visualization <- renderPlotly({
      ggplotly(reg)
    })
    
    formula <- as.formula(paste(input$variable1, "~", input$variable2))
    model <- lm(formula, data = df_sleep_clean)
    
    output$summary <- renderPrint({
      summary(model)
    })
  })
  
  observeEvent(input$submitbutton2, {
    regression <- reactive({
      lm(formula = paste(input$variable1h, "~", input$variable2h), data = df_sleep_clean)
    })
    
    output$htest <- renderPrint({
      summary_coef <- summary(regression())
      p_value <- summary_coef$coefficients[input$variable2h, "Pr(>|t|)"]
      print(p_value)
      
      if (p_value < 0.05) {
        print(paste(input$variable2h, "affects", input$variable1h))
      }
      else {
        print(paste(input$variable2h, "does not affect", input$variable1h))
      }
    })
  })
  
  
}
shinyApp(ui = ui,
         server = server)
Shiny applications not supported in static R Markdown documents